In [1]:
%reload_ext autotime
import pandas as pd
import requests
from pprint import pprint
import json
import torch
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor
from tqdm.auto import tqdm
pd.options.plotting.backend = "plotly"
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", 100)
✔️ 4.72 s (2024-12-12T09:23:59/2024-12-12T09:24:03)
2024-12-12 09:24:02.300884: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. 2024-12-12 09:24:02.313220: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered 2024-12-12 09:24:02.329137: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered 2024-12-12 09:24:02.334326: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered 2024-12-12 09:24:02.348072: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2024-12-12 09:24:03.164700: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
In [13]:
df = pd.read_csv("results.csv").drop_duplicates(subset="panoid")
df
✔️ 20.1 ms (2024-12-12T10:02:32/2024-12-12T10:02:32)
Out[13]:
| Index | pid | n | time | anxiousness | latitude | longitude | geometry | panoid | panolat | panolon | panodate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | P20001 | 1 | 2023-04-25T02:51:42Z | 0 | -36.924795 | 174.738044 | POINT (174.7380435 -36.92479483) | IvrcS0W1RlFAlnci-p39XA | -36.924665 | 174.737914 | 2012-04 |
| 10 | 10 | P20001 | 11 | 2023-04-24T00:42:25Z | 0 | -36.924837 | 174.737948 | POINT (174.7379477 -36.92483659) | QEpZV7bnO2mBfp0weMUKEg | -36.924730 | 174.737826 | 2012-04 |
| 13 | 13 | P20006 | 1 | 2023-06-03T02:45:55Z | 3 | -36.892203 | 174.740125 | POINT (174.7401253 -36.89220256) | omb98QNjTPWi0uUfMsmYeg | -36.892621 | 174.739961 | 2024-05 |
| 14 | 15 | P20009 | 2 | 2023-05-17T04:54:48Z | 3 | -36.923191 | 174.748620 | POINT (174.7486203 -36.92319093) | E7B5AV3DQ1rYWDClVRo8Zg | -36.923194 | 174.748831 | 2024-05 |
| 17 | 19 | P20009 | 6 | 2023-05-19T22:28:51Z | 1 | -36.923260 | 174.748655 | POINT (174.748655 -36.92325959) | KCTcsxYCIm41XdzkYEYUQw | -36.923286 | 174.748840 | 2024-05 |
| 19 | 21 | P20015 | 1 | 2023-05-17T07:34:00Z | 5 | -36.921603 | 174.747739 | POINT (174.747739 -36.92160252) | ESE0Slg2IO7Vf3QdBhETkg | -36.921626 | 174.747253 | 2024-05 |
| 22 | 24 | P20021 | 1 | 2023-06-03T03:55:41Z | 1 | -38.140714 | 176.251862 | POINT (176.2518616 -38.14071376) | AF1QipNiSoDDA2omwMtXrIq76eZTz6u7JXaIMz2lY2HN | -38.140783 | 176.251412 | 2017-04-01 |
| 23 | 25 | P20021 | 2 | 2023-06-04T02:33:49Z | 6 | -37.675727 | 175.209414 | POINT (175.2094142 -37.67572725) | _0GQKQIk42dFPo3qVzblaw | -37.675754 | 175.209423 | 2023-11 |
| 24 | 26 | P20021 | 3 | 2023-06-05T21:49:46Z | 3 | -36.894889 | 174.742775 | POINT (174.7427751 -36.89488899) | qgtMQGHZWUUIBCa8JgbBhA | -36.895076 | 174.742734 | 2024-05 |
| 25 | 27 | P20021 | 4 | 2023-06-06T02:29:11Z | 5 | -36.894854 | 174.742929 | POINT (174.7429285 -36.89485419) | T4yBf38jq472FmvtzEtI_w | -36.895101 | 174.742848 | 2024-05 |
| 26 | 30 | P20022 | 3 | 2023-04-25T06:42:09Z | 1 | -36.913380 | 174.731288 | POINT (174.7312875 -36.91337995) | do2cpZfBTwfxHkWnQkyL3A | -36.913440 | 174.731310 | 2024-07 |
| 27 | 31 | P20022 | 4 | 2023-04-25T22:31:15Z | 6 | -36.880662 | 174.707832 | POINT (174.7078325 -36.88066162) | AF1QipPN3SoaDBQHiLKu_Lej8k2CncLSg7gBWX-B6XEi | -36.880687 | 174.707800 | 2020-09-09 |
| 28 | 33 | P20022 | 6 | 2023-04-24T03:16:17Z | 4 | -36.852978 | 174.767267 | POINT (174.7672665 -36.85297814) | AF1QipMqMyDEii4FocJHe8Ni_YuVMbgNxZ7J6iN0NUnW | -36.852951 | 174.767188 | 2017-08-31 |
| 29 | 34 | P20027 | 1 | 2023-05-27T21:50:10Z | 6 | -36.892136 | 174.736943 | POINT (174.7369429 -36.89213617) | ody-NBwD6S0562GUtROqtg | -36.891996 | 174.737012 | 2024-06 |
| 36 | 41 | P20027 | 8 | 2023-05-30T21:17:36Z | 2 | -36.887537 | 174.736875 | POINT (174.7368754 -36.88753691) | AF1QipOAyOpW4qq51x-aNMdgBZUFedxBzEf4lh8NtdKV | -36.887436 | 174.737391 | 2024-12-04 |
| 38 | 43 | P20027 | 10 | 2023-06-01T01:04:35Z | 1 | -36.887221 | 174.736789 | POINT (174.7367892 -36.88722101) | Vy5UxGKwH8RxSoG2tFB94Q | -36.887653 | 174.737623 | 2024-06 |
| 39 | 44 | P20027 | 11 | 2023-06-01T05:24:54Z | 3 | -36.888974 | 174.735651 | POINT (174.7356508 -36.88897381) | ZHmWEeGCwOCHGbCNhNn3FQ | -36.888915 | 174.735645 | 2023-01 |
| 40 | 45 | P20027 | 12 | 2023-06-02T03:42:14Z | 3 | -36.887732 | 174.735789 | POINT (174.7357892 -36.88773177) | XhlNCQVpfaXvJCnb9PQDSg | -36.887963 | 174.735387 | 2023-01 |
| 45 | 50 | P20033 | 1 | 2023-05-03T08:19:03Z | 0 | -36.978477 | 174.830027 | POINT (174.8300269 -36.9784771) | tbfXbYFHITDw8p7vCFU3KA | -36.978564 | 174.830225 | 2022-08 |
| 47 | 52 | P20033 | 3 | 2023-05-04T02:17:26Z | 0 | -36.978365 | 174.830125 | POINT (174.8301251 -36.97836488) | BoCOn1VpFGrbXlyX3EKZ6g | -36.978467 | 174.830275 | 2022-08 |
| 48 | 54 | P20033 | 5 | 2023-05-05T00:55:39Z | 0 | -36.981856 | 174.833483 | POINT (174.8334831 -36.98185571) | AF1QipPUo-N8j6tJkZdqJHviaaD70cDheuapj22qHwI1 | -36.981880 | 174.832996 | 2018-04-16 |
In [3]:
# Loading this model needs about 22.69GB of GPU memory
model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
model = MllamaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(model_id)
✔️ 13.6 s (2024-12-12T09:24:04/2024-12-12T09:24:17)
The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
Loading checkpoint shards: 0%| | 0/5 [00:00<?, ?it/s]
In [15]:
for row in tqdm(df.sample(10).itertuples(index=False)):
panoid = row.panoid
image = Image.open(f"panoramas/{panoid}.jpg")
display(image)
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": """
This image is a panorama from Google Street View.
From the image, extract the following information, in JSON format:
green: Percentage of the image that is green space (e.g. parks, gardens, trees, grass etc.). A number from 0-100.
environment: Classify the nature of the environment in this image. Built up/green/residential/shops/cafes?. A string.
water: If you see any streams/ponds/rivers/ocean in the image, estimate the distance to the water in meters. A number. If there is no water, return 0.
obscured: Proportion of view obscured by buildings (how much of total line of sight is blocked by buildings in close proximity). A number from 0-100.
people: the number of people you see in the image
cars: the number of cars you see in the image
bikes: the number of bikes you see in the image
Do not include comments in your JSON response. Only respond with the JSON object. Make sure the JSON is valid.
"""},
{"type": "image"},
]
}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
image,
input_text,
add_special_tokens=False,
return_tensors="pt"
).to(model.device)
for retry in range(3):
output = model.generate(**inputs, max_new_tokens=5000)
result = processor.decode(output[0])
result = result[result.rindex("<|end_header_id|>") + len("<|end_header_id|>"):].strip().replace("<|eot_id|>", "")
print("Output:")
try:
result = json.loads(result)
pprint(result)
print("\n")
break
except json.JSONDecodeError:
print(f"Unable to parse: {result}")
⌛ 1.73 µs (2024-12-12T10:03:19)
0it [00:00, ?it/s]
Output:
{'bikes': 0,
'cars': 4,
'environment': 'residential',
'green': 60,
'obscured': 40,
'people': 1,
'water': 0}
Output:
{'bikes': 0,
'cars': 0,
'environment': 'residential',
'green': 45,
'obscured': 0,
'people': 0,
'water': 0}